#!/usr/bin/env python
''' Check which C functions are not covered by the test suite and create a report

This script is meant to be run from a trunk/branch root, to gather information
coverage of C functions by the test suite. It needs a  "make coverage" build 
and running the test suite in some way.
 
The '-m' command line switch creates a workable dataset by calling
"make coverage test", but it's possible that functions flagged as not-covered
would be flexed by a more complete run of the test suite.

The '-r' command line switch runs gcov to tally the number of times each 
function is called, then picks those never called and cleans the data to 
create a concise report.

'''

import itertools
import os
import re
import sys
from os import path
from subprocess import Popen, PIPE
from optparse import OptionParser

DEVNULL = open('/dev/null', 'w')

# gcov executable and options
# -n means "don't write .gcov files for each analyzed C file", so remove
# it to get those per file coverage reports
# -f asks for function call summaries
GCOVARGS = ['gcov', '-n', '-f', '-o']

# Functions that should be ignored (_Py_tok_dump too?)
BOGUS = set(('gnu_dev_major', 'getitem_idx', '*fstat64', '_Py_tok_dump',
             'mknod', 'gnu_dev_minor', 'mknodat', 'indenterror', 
             'gnu_dev_makedev','*stat64', '*fstatat64', '*lstat64',
             'ffi_prep_args_raw', 'ffi_prep_raw_closure_loc', 'ffi_raw_call',
             'initialize_aggregate', 'tipc_addr', 'tipc_cluster', 'tipc_node',
              'tipc_zone',
             ))

# Patterns for matching functions with 0 calls and lines about functions
# (as opposed to files, etc.). Probably depend on locale.
FUNCTPL =  ("Function '", "'\nLines executed:", '% of ', '')
FUNC = re.compile('(.+)'.join(re.escape(bit) for bit in FUNCTPL) + '$')


# Match files that have coverage data under build/temp*
ERROR = 'cannot open graph file'

def matches(patlist):
    ''' Returns a re.match()er for items in the pattern list
    '''
    # Allow passing a tuple or list instead of a string
    if not isinstance(patlist, basestring):
        patlist = ' '.join(patlist)
    # Prepare to use as regexp
    patlist = re.escape(patlist)
    # Split and normalize whitespace
    patlist = ' '.join(patlist.split(',')).split()
    # Our pattern should match the whole string
    pattern = '.+[%s]$' % '|'.join(patlist)
    # Returns a .match method, not a re object
    return re.compile(pattern).match

def find(top='./', suffixes='.c', ignore='.svn,.bzr,CVS'):
    ''' Drop-in replacement for Popen(FIND) '''
    if not path.exists(top):
        print('The specified directory does not exist:')
        print(top)
        print('Which points to: %s' % path.abspath(top))
        sys.exit(2)
    include = matches(suffixes)
    exclude = matches(ignore)
    files = []
    join = path.join
    for dirpath, dirs, namelist in os.walk(top):
        for name in namelist:
            if include(name):
                files.append(join(dirpath, name))
        for child in dirs:
            if exclude(child):
                dirs.remove(child)
    return files

def funcmatch(funclist, checkall=False, mincov=0):
    ''' Check a list of functions to find those never called
    
    If checkall == True, no check against BOGUS is made
    Return a (cleaned) list of 'function_name #lines'
    
    '''
    result = []
    isfunction = FUNC.match
    for item in funclist:
        hit = isfunction(item)
        if hit:
            name, percent, length = hit.groups()
            valid = checkall or (name not in BOGUS)
            if valid and float(percent) <= mincov:
                cleanfunc = name, length
                result.append(cleanfunc)
    return result


def findcov():
    ''' Finds coverage data for modules built into the python binary
    
    For many (most?) file, coverage data is in the same dir as the source, but
    modules that are included in the python binary have coverage data under
    './build/temp*/path/Modules/*', so we need to find those and map them to
    source file names.
    
    '''
    covfiles = find(suffixes='.gcno')
    # Each item is {'filename.c':'path/to/coverage/data'}
    covdirs = {}
    for item in covfiles:
        covpath, filename = path.split(item)
        cfile = path.splitext(filename)[0] + '.c'
        covdirs[cfile] = covpath
    return covdirs

def rungcov(covdirs, checkall=False):
    ''' Finds .c files, calls gcov for each one and stores the output '''
    files = find()
    # Each item is {'filename.c':[(function_name, function_length)]}
    gcov_output = {}
    errors = []
    for entry in files:
        cfile = path.basename(entry)
        # If gcov data not in covdirs, use the source dir
        edir = covdirs.get(cfile, path.dirname(entry))
        gcov = Popen(GCOVARGS + [edir, entry], stdout=PIPE, stderr=PIPE)
        if ERROR in ''.join(x.decode() for x in gcov.stderr.readlines()):
            errors.append(entry)
        funclines = [line.decode() for line in gcov.stdout.readlines()]
        funclist = ''.join(funclines).split('\n\n')
        cleanlist = funcmatch(funclist, checkall)
        if cleanlist:
            gcov_output[entry] = cleanlist
    if __name__ == '__main__':
        print('No coverage data was found for these files:')
        print('\n'.join(errors))
    return gcov_output


BOGUSMSG = '''
These functions were found as hits in more than one file:
%s

Of the above, these are missing from BOGUS:
%s 

These functions are in BOGUS but were not found in this run:
%s

'''


def listbogus(gcov_output):
    ''' List functions never called but present in more than one file '''
    functions = [fun[0] for fun in sum(gcov_output.values(), [])]
    funccount = {}
    for function, grouped in itertools.groupby(sorted(functions)):
        count = len(list(grouped))
        if count > 1:
            funcname = function.split(' ')[0]
            funccount[funcname] = count
    
    fset = set(funccount.keys())
    missing, notfound = (fset - BOGUS), (BOGUS - fset)
    sets = fset, missing, notfound
    bogusdiff = tuple(', '.join(sorted(diff)) for diff in sets)
    print(BOGUSMSG % bogusdiff)

def write(formated, output='uncovered.txt'):
    ''' Writes the dict res to the file fname '''
    end = '\n'
    tpl = ' ' * 4 + '%s %s\n'
    msg = []
    for filename, lines in sorted(formated.items()):
        msg.append(filename + ':' + end)
        msg += [tpl % (name, count) for name, count in lines] + [end]
    out = open(output, 'w')
    out.writelines(msg)
    out.close()


def main():
    ''' Runs the show: collect data, process and report '''
    desc = 'Find C functions not covered by the test suite.'
    parser = OptionParser(description=desc)
    parser.add_option('-m', '--make', 
                      dest='make', action='store_true', default=False,
                      help='configure, compile and run the test suite')

    parser.add_option('-r', '--run', 
                      dest='run', action='store_true', default=False,
                      help='analyze coverage data and report to file')

    parser.add_option('-a', '--all', 
                      dest='checkall', action='store_true', default=False,
                      help='include "bogus" functions')
    parser.add_option('-g', '--gcov', 
                      dest='gcovfiles', action='store_true', default=False,
                      help='create .gcov files')

    options, args = parser.parse_args()
    nop = True
    if options.make:
        # Err, also makes sure you have enough data
        Popen(['./configure', '--with-pydebug'])
        Popen(['make', 'clean', 'coverage', 'test'])
        nop = False
    checkall = options.checkall
    if options.gcovfiles:
        GCOVARGS.pop(1)
    if options.run:
        covdirs = findcov()
        gcov_output = rungcov(covdirs, checkall)
        write(gcov_output)
        if checkall:
            listbogus(gcov_output)
        nop = False
    if nop:
        parser.print_help()

if __name__ == '__main__':
    main()